{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "material-defeat",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import geopandas as gpd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "reliable-cookbook",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ICE-Spark baseline comparision\n",
    "# ST join task for S2 self-intersection (1 month, 3 months, and 6 months)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "detailed-idaho",
   "metadata": {},
   "outputs": [],
   "source": [
    "Sentinel_gdf = gpd.read_file('../../Metadata/for_baselines/sentinel_selfintersect_3months.shp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "descending-postage",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>s2_index</th>\n",
       "      <th>S2_max_tim</th>\n",
       "      <th>S2_min_tim</th>\n",
       "      <th>S2_join_ti</th>\n",
       "      <th>geometry</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20201001T002611_20201001T002613_T01WCS</td>\n",
       "      <td>1.601528e+09</td>\n",
       "      <td>1.601525e+09</td>\n",
       "      <td>1601526424</td>\n",
       "      <td>POLYGON ((177.97789 69.07297, 177.97782 69.072...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20201001T002611_20201001T002613_T01WCV</td>\n",
       "      <td>1.601528e+09</td>\n",
       "      <td>1.601525e+09</td>\n",
       "      <td>1601526390</td>\n",
       "      <td>POLYGON ((-179.62808 72.08202, -179.62812 72.0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20201001T002611_20201001T002613_T58WFA</td>\n",
       "      <td>1.601528e+09</td>\n",
       "      <td>1.601525e+09</td>\n",
       "      <td>1601526469</td>\n",
       "      <td>POLYGON ((169.90690 67.45342, 168.17129 67.496...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20201001T002611_20201001T002613_T58WFB</td>\n",
       "      <td>1.601528e+09</td>\n",
       "      <td>1.601525e+09</td>\n",
       "      <td>1601526455</td>\n",
       "      <td>POLYGON ((170.00452 69.33748, 170.00464 69.337...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20201001T002611_20201001T002613_T58WFC</td>\n",
       "      <td>1.601528e+09</td>\n",
       "      <td>1.601525e+09</td>\n",
       "      <td>1601526444</td>\n",
       "      <td>POLYGON ((170.42145 69.68937, 170.42152 69.689...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50575</th>\n",
       "      <td>20201230T234509_20201230T234844_T50CNU</td>\n",
       "      <td>1.609392e+09</td>\n",
       "      <td>1.609388e+09</td>\n",
       "      <td>1609390143</td>\n",
       "      <td>POLYGON ((116.99905 -78.46057, 116.99899 -78.4...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50576</th>\n",
       "      <td>20201230T234509_20201230T234844_T50CNV</td>\n",
       "      <td>1.609392e+09</td>\n",
       "      <td>1.609388e+09</td>\n",
       "      <td>1609390134</td>\n",
       "      <td>POLYGON ((119.35323 -76.80304, 120.09493 -76.9...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50577</th>\n",
       "      <td>20201230T234509_20201230T234844_T51CVP</td>\n",
       "      <td>1.609392e+09</td>\n",
       "      <td>1.609388e+09</td>\n",
       "      <td>1609390139</td>\n",
       "      <td>POLYGON ((118.52874 -78.42619, 118.70826 -77.9...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50578</th>\n",
       "      <td>20201230T234509_20201230T234844_T51CVQ</td>\n",
       "      <td>1.609392e+09</td>\n",
       "      <td>1.609388e+09</td>\n",
       "      <td>1609390132</td>\n",
       "      <td>POLYGON ((119.17900 -76.76710, 119.34475 -76.8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50579</th>\n",
       "      <td>20201230T234509_20201230T234844_T51CWP</td>\n",
       "      <td>1.609392e+09</td>\n",
       "      <td>1.609388e+09</td>\n",
       "      <td>1609390131</td>\n",
       "      <td>POLYGON ((122.99917 -77.52778, 123.00168 -77.5...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>50580 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     s2_index    S2_max_tim    S2_min_tim  \\\n",
       "0      20201001T002611_20201001T002613_T01WCS  1.601528e+09  1.601525e+09   \n",
       "1      20201001T002611_20201001T002613_T01WCV  1.601528e+09  1.601525e+09   \n",
       "2      20201001T002611_20201001T002613_T58WFA  1.601528e+09  1.601525e+09   \n",
       "3      20201001T002611_20201001T002613_T58WFB  1.601528e+09  1.601525e+09   \n",
       "4      20201001T002611_20201001T002613_T58WFC  1.601528e+09  1.601525e+09   \n",
       "...                                       ...           ...           ...   \n",
       "50575  20201230T234509_20201230T234844_T50CNU  1.609392e+09  1.609388e+09   \n",
       "50576  20201230T234509_20201230T234844_T50CNV  1.609392e+09  1.609388e+09   \n",
       "50577  20201230T234509_20201230T234844_T51CVP  1.609392e+09  1.609388e+09   \n",
       "50578  20201230T234509_20201230T234844_T51CVQ  1.609392e+09  1.609388e+09   \n",
       "50579  20201230T234509_20201230T234844_T51CWP  1.609392e+09  1.609388e+09   \n",
       "\n",
       "       S2_join_ti                                           geometry  \n",
       "0      1601526424  POLYGON ((177.97789 69.07297, 177.97782 69.072...  \n",
       "1      1601526390  POLYGON ((-179.62808 72.08202, -179.62812 72.0...  \n",
       "2      1601526469  POLYGON ((169.90690 67.45342, 168.17129 67.496...  \n",
       "3      1601526455  POLYGON ((170.00452 69.33748, 170.00464 69.337...  \n",
       "4      1601526444  POLYGON ((170.42145 69.68937, 170.42152 69.689...  \n",
       "...           ...                                                ...  \n",
       "50575  1609390143  POLYGON ((116.99905 -78.46057, 116.99899 -78.4...  \n",
       "50576  1609390134  POLYGON ((119.35323 -76.80304, 120.09493 -76.9...  \n",
       "50577  1609390139  POLYGON ((118.52874 -78.42619, 118.70826 -77.9...  \n",
       "50578  1609390132  POLYGON ((119.17900 -76.76710, 119.34475 -76.8...  \n",
       "50579  1609390131  POLYGON ((122.99917 -77.52778, 123.00168 -77.5...  \n",
       "\n",
       "[50580 rows x 5 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Sentinel_gdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "adjusted-photograph",
   "metadata": {},
   "outputs": [],
   "source": [
    "Sentinel_copy_tojoin_left = Sentinel_gdf[['s2_index', 'S2_join_ti', 'geometry']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "committed-payday",
   "metadata": {},
   "outputs": [],
   "source": [
    "Sentinel_copy_tojoin = Sentinel_gdf[['s2_index', 'S2_join_ti', 'geometry']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "vietnamese-alignment",
   "metadata": {},
   "outputs": [],
   "source": [
    "Sentinel_copy_tojoin = Sentinel_copy_tojoin.rename({'s2_index': 's2_index_copy', 'S2_join_ti': 'S2_time'}, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "periodic-recall",
   "metadata": {},
   "outputs": [],
   "source": [
    "def test_pandas_sjoin(delay_length):\n",
    "    start_spatial_time = time.time()\n",
    "    S2_self_sjoin = gpd.sjoin(Sentinel_copy_tojoin_left, Sentinel_copy_tojoin)\n",
    "    print(S2_self_sjoin.shape)\n",
    "    print(S2_self_sjoin.columns)\n",
    "    S2_self_sjoin['S2_time_max'] = S2_self_sjoin['S2_join_ti'] + delay_length * 3600\n",
    "    S2_self_sjoin['S2_time_min'] = S2_self_sjoin['S2_join_ti'] - delay_length * 3600\n",
    "    S3I2_sjoin = S2_self_sjoin[(S2_self_sjoin['S2_join_ti'] <= S2_self_sjoin['S2_time_max']) & (S2_self_sjoin['S2_join_ti'] >= S2_self_sjoin['S2_time_min'])& (S2_self_sjoin['s2_index'] != S2_self_sjoin['s2_index_copy'])]\n",
    "    \n",
    "    S3I2_sjoin[['s2_index', 's2_index_copy']].to_csv('S2_selfinter_baseline.csv')\n",
    "\n",
    "    with open('ICESpark_selfinter_baseline_{}_delay_geopandas.txt'.format(str(delay_length)), 'w') as f:\n",
    "        f.write('time for 3 month ST join using geopanas:')\n",
    "        f.write(str(time.time() - start_spatial_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "alternative-healing",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "fluid-gibraltar",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(12966198, 6)\n",
      "Index(['s2_index', 'S2_join_ti', 'geometry', 'index_right', 's2_index_copy',\n",
      "       'S2_time'],\n",
      "      dtype='object')\n",
      "(12966198, 6)\n",
      "Index(['s2_index', 'S2_join_ti', 'geometry', 'index_right', 's2_index_copy',\n",
      "       'S2_time'],\n",
      "      dtype='object')\n",
      "(12966198, 6)\n",
      "Index(['s2_index', 'S2_join_ti', 'geometry', 'index_right', 's2_index_copy',\n",
      "       'S2_time'],\n",
      "      dtype='object')\n",
      "(12966198, 6)\n",
      "Index(['s2_index', 'S2_join_ti', 'geometry', 'index_right', 's2_index_copy',\n",
      "       'S2_time'],\n",
      "      dtype='object')\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "for delay_i in [6, 3, 1, 0.5]:\n",
    "    test_pandas_sjoin(delay_i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "visible-citation",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "surprising-rhythm",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
